Detect chromosomal duplications

Author

Claudia Zirión-Martínez

Published

February 13, 2025

Setup

Libraries

Code
library(tidyverse)
library(ggtree)
library(ggtreeExtra)
library(ape)
library(ggnewscale)
library(RColorBrewer)
library(svglite)
source("scripts/metadata_colors.R")

Paths

Code
metadata_path <- 
    "data/processed/metadata_ashton_desj_all_weavepop_H99.csv"
duplications_path <- 
    "results/tables/duplications.tsv"
merged_tree_path <- 
    "data/processed/tree_merged.newick"
tree_merged_duplications_path <- 
    "results/trees_dups/tree_merged_duplications.png"
tree_merged_duplications_12_13_path <- 
    "results/trees_dups/tree_merged_duplications_12_13.png"
tree_merged_duplications_only_duplicated <-  
    "results/trees_dups/tree_merged_duplications_only_duplicated.png"
tree_merged_duplications_only_duplicated2 <- 
    "results/trees_dups/tree_merged_duplications_only_duplicated2.png"
tree_merged_duplications_only_duplicated3 <- 
    "results/trees_dups/tree_merged_duplications_only_duplicated3.png"
tree_merged_duplications_only_duplicated4 <- 
    "results/trees_dups/tree_merged_duplications_only_duplicated4.svg"

Metadata

Load the necessary data

Code
metadata <- read.csv(
    metadata_path,
    header = TRUE)

Get one dataframe for each variable to be plotted as a separate metadata column in the tree

Code
metadata$vni_subdivision <- factor(metadata$vni_subdivision,
                            levels = c("VNIa-4", "VNIa-5", "VNIa-32", 
                            "VNIa-93", "VNIa-X", "VNIa-Y", "VNIb", 
                            "VNIc", "VNIa-outlier"))

sublineage <- metadata %>%
                filter(lineage == "VNI")%>%
                select(strain, vni_subdivision)%>%
                column_to_rownames("strain")%>%
                droplevels()
lineage <- metadata %>%
            select(strain, lineage)%>%
            column_to_rownames("strain")
dataset <- metadata %>%
            select(strain, dataset)%>%
            column_to_rownames("strain")
source <- metadata %>%
            select(strain, source)%>%
            column_to_rownames("strain")

Duplications

Code
duplications <- read.delim(
    duplications_path,
    sep = "\t", header = TRUE, stringsAsFactors = TRUE)
Code
duplications_full <- duplications %>%
    select(strain, chromosome) %>%
    distinct()

Make matrix of duplicated chromosomes

Code
dup_chroms <- duplications_full %>%
    select(strain, chromosome)%>%
    mutate(duplicated_full = 1)%>%
    arrange(chromosome)%>%
    pivot_wider(names_from = chromosome, values_from = duplicated_full, values_fill = 0)%>%
    column_to_rownames("strain")%>%
    mutate(across(everything(), ~ ifelse(. == 1, cur_column(),"Euploid")))

euploid_strain <- metadata %>%
    filter(!strain %in% duplications_full$strain)%>%
    select(strain)

for (chrom in colnames(dup_chroms)){
    euploid_strain[chrom] <- "Euploid"
}

dup_chroms <- euploid_strain %>%
    column_to_rownames("strain") %>%
    bind_rows(dup_chroms)

Tree

Code
tree <- read.tree(merged_tree_path)

Remove tips that are not in metadata$strain

Code
tree <- drop.tip(tree, setdiff(tree$tip.label, metadata$strain))

Plots

Code
chrom_dup_colors <- c(chrom_colors, "Euploid" = "grey93")

Tree of all samples with duplications of all chromosomes

Tree of all samples with duplications of chromosomes 12 and 13

Subset the duplications_full data frame to only include strains with duplications of chromosomes 12 and 13

Code
dup_chroms_12_13 <- dup_chroms %>%
    select(chr12, chr13)

Tree with only the samples that have duplications and the references

Code
keep_strains <- c(levels(duplications_full$strain), "H99", "Bt22", "Bt81")
tree_dups <- drop.tip(tree, setdiff(tree$tip.label, keep_strains))
sublineage <- sublineage %>%
                filter(rownames(.) %in% keep_strains)%>%
                droplevels()

Dataset, lineage, sublineage, source, duplications

Lineage, sublineage, duplications

Lineage, duplications, sublineage

Lineage, duplications

VNII = 94 VNBI = 55 VNBII = 52 VNI = 57